In this markdown, we will do a comparison of the transcriptomes during development of Ptychodera and Branchiostoma floridae.
For this, we will use a solution of custom R functions and wrappers that we have named “comparABle”.
library(tidyverse)
library(stringr)
library(BiocGenerics)
library(EDASeq)
library(DESeq2)
library(tximport)
library(limma)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
require(data.table)
require(stringr)
require(tidytree)
require(ggtree)
require(rvcheck)
require(treeio)
require(dendextend)
require(phangorn)
require(phytools)
require(ComplexHeatmap)
require(foreach)
require(doParallel)
require(colorspace)
library(topGO)
source("code/r_code/r_functions/sourcefolder.R")
sourceFolder(
"code/r_code/r_functions",
recursive = TRUE
)
sourceFolder(
"code/r_code/r_general",
recursive = TRUE
)
We will first load the data of our species:
# Ptychodera analyses
load("outputs/rda/deseq2.rda")
# Amphioxus reanalysis
load("outputs/rda/bflo_reanalysis.rda")
For more functional annotation, we will use the gene age, COG functional categories, and the GO terms of Ptychodera.
load("outputs/rda/geneage.rda")
pfla_cogs <- read.table(
"outputs/functional_annotation/COGs/pfla_cogs.tsv",
col.names = c("id","cog")
)
# GO terms
pfla_id2go <-
readMappings(
"outputs/functional_annotation/go_blast2go/GO_annotation.txt"
)
rbh_pfla_bflo <- read.delim2("outputs/comparative/rbh/pfla_bflo_rbh.tsv", header = FALSE)
rbh_pfla_bflo[,2] <- gsub(" ","",rbh_pfla_bflo[,2])
rbh_pfla_bflo[,1] <- gsub("\\.p[0-9]+","",rbh_pfla_bflo[,1])
# Expression data, using all genes. No need for transformation
a = pfla_rna_counts
b = bflo_counts
a_samples = levels(condition_x)
b_samples = unique(sub("_.$", "", colnames(b)))
# Family/orthology data
o = unique(rbh_pfla_bflo)
# GOs
a_universe = rownames(vsd_allgen)
a_id2go = pfla_id2go
# Tidy up
samples_a = a_samples
a = qnorm(a)
## Loading required package: preprocessCore
a = tidyup(a, highlyvariable = FALSE) # remove genes with 0 tpms
a = rep2means(samples_a,a)
samples_b = b_samples
b = qnorm(b)
b = tidyup(b, highlyvariable = FALSE)
b = rep2means(samples_b,b) # remove genes with 0 tpms
o = pair2id(o)
# MERGE
merge_ab <- mergedata(a,b,o)
# CORRELATIONS
cors <- rawcorsp(merge_ab$a_o,merge_ab$b_o) # FIX JSD
# CORRELATIONS
plot_cors(cors)
We will follow by loading all the gene family data that we need. This is the gene/gfam lookup table (extracted from the OrthoXML output file from OMA) and the respective table to transform the gene indexes into the actual gene id of every species.
gene_gfam <- read.delim2("outputs/comparative/20240404_oma/oma_omaid_gfam.tsv", header = TRUE)
colnames(gene_gfam) <- c("id","gfam")
gene_gfam$id <- gsub("\\.p[0-9]+","",gene_gfam$id)
oma_pfla_bflo <- read.delim2("outputs/comparative/20240404_oma/one_to_one_orthologues/1to1_pfla_bflo.tsv", header = FALSE)
oma_pfla_bflo$V2 <- gsub(" ","",oma_pfla_bflo$V2)
load("outputs/rda/geneage.rda")
For more functional annotation, we will use the COG functional categories and the GO terms of Ptychodera.
pfla_cogs <- read.table(
"outputs/functional_annotation/COGs/pfla_cogs.tsv",
col.names = c("id","cog")
)
# GO terms
pfla_id2go <-
readMappings(
"outputs/functional_annotation/go_blast2go/GO_annotation.txt"
)
We will first compare the transcriptomes and stage-specific clusters of Ptychodera and Amphioxus. For this, we will define a number of objects that will enter into our custom wrapper function comparABle. The most important of them being:
In addition, we will also pass it a list of gene ontologies, a COG functional category association file, and a list of gene ages that are shared between species A and B.
# Expression data
a = pfla_rna_counts
b = bflo_counts
# Samples for rowmeans_by repl
a_samples = levels(condition_x)
b_samples = unique(sub("_.$", "", colnames(b)))
# Family/orthology data
o = unique(oma_pfla_bflo)
f = gene_gfam[grep("TCONS|bflo",gene_gfam$id),]
# Module/Cluster information
ma = data.frame(
id = rownames(pfla_rna_dev),
module = pfla_rna_dev$cID
)
mb = bflo_cl
colnames(mb) <- c("id","module")
# Gene Age
ga = pfla_age[,c(1,3)]
colnames(ga) = c("id","age")
# COG
cog_a <- pfla_cogs
# GOs
a_universe = rownames(vsd_allgen)
a_id2go = pfla_id2go
# Common Evo Nodes
common_evo_nodes = unique(ga$age)[!(unique(ga$age) %in% c("6_ambulacraria","7_hemichordata","8_Pfla"))]
Below we will run the comparABle wrapper. This can take some time.
PFLA_BFLO_COMPARISON <- comparABle(
a_name = "P.flava",
b_name = "B.floridae",
a = a,
b = b,
o = o,
f = f,
ma = ma,
mb = mb,
ga = ga,
gb = gb,
cog_a = cog_a,
cog_b = cog_b,
a_samples = a_samples,
b_samples = b_samples,
highlyvariable = FALSE,
cooc_p = 0.05,
cooc_h = c(0.70,0.95),
cooc_cor_method = "pearson",
a_universe = a_universe,
a_id2go = a_id2go,
common_evo_nodes = common_evo_nodes,
sep = ",\ "
)
## [1] "Tidy up data"
## [1] "Merge data"
## [1] "Correlations"
## [1] "PCA"
## [1] "Co-Occurrence"
## [1] "Common genes in Correlations"
## The top five similar pair of stages are: 18.0721 18.47827 18.58822 18.62579 18.71676
## [1] "Subsetting count matrices"
## [1] "Model fitting and top genes"
## [1] "Common genes in Correlations (GO)"
## [1] "Starting analysis 1 of 5"
## [1] "Starting analysis 2 of 5"
## [1] "Starting analysis 3 of 5"
## [1] "Starting analysis 4 of 5"
## [1] "Starting analysis 5 of 5"
## [1] "Common genes in Correlations (age)"
## [1] "Pairwise Orthology Overlap Strategy across modules -- hypergeometric and binonmial tests"
## [1] "Getting info on genes from shared families across modules"
## [1] "Starting analysis 1 of 20"
## [1] "Starting analysis 2 of 20"
## [1] "Starting analysis 3 of 20"
## [1] "Starting analysis 4 of 20"
## [1] "Starting analysis 5 of 20"
## [1] "Starting analysis 6 of 20"
## [1] "Starting analysis 7 of 20"
## [1] "Starting analysis 8 of 20"
## [1] "Starting analysis 9 of 20"
## [1] "Starting analysis 10 of 20"
## [1] "Starting analysis 11 of 20"
## [1] "Starting analysis 12 of 20"
## [1] "Starting analysis 13 of 20"
## [1] "Starting analysis 14 of 20"
## [1] "Starting analysis 15 of 20"
## [1] "Starting analysis 16 of 20"
## [1] "Starting analysis 17 of 20"
## [1] "Starting analysis 18 of 20"
## [1] "Starting analysis 19 of 20"
## [1] "Starting analysis 20 of 20"
## [1] "Plots"
## [1] "Generating results"
plot_cors(PFLA_BFLO_COMPARISON$pairwise_correlations)
# Common genes, highly correlated
p1 <- plot_hicor_genes(
scatter = PFLA_BFLO_COMPARISON$high_corr_genes$pairwise_data$hicor_topgenes$cor_08_To__09_L2$plot_topgenes,
GO_plot = PFLA_BFLO_COMPARISON$high_corr_genes$GOs$GOplot$cor_08_To__09_L2,
age_table = PFLA_BFLO_COMPARISON$high_corr_genes$age$cor_08_To__09_L2$AgeperModule,
age_enrichemnt_hm = PFLA_BFLO_COMPARISON$high_corr_genes$age$cor_08_To__09_L2$heatmap
)
p2 <- plot_hicor_genes(
scatter = PFLA_BFLO_COMPARISON$high_corr_genes$pairwise_data$hicor_topgenes$cor_07_LG__08_L1$plot_topgenes,
GO_plot = PFLA_BFLO_COMPARISON$high_corr_genes$GOs$GOplot$cor_07_LG__08_L1,
age_table = PFLA_BFLO_COMPARISON$high_corr_genes$age$cor_07_LG__08_L1$AgeperModule,
age_enrichemnt_hm = PFLA_BFLO_COMPARISON$high_corr_genes$age$cor_07_LG__08_L1$heatmap
)
p3 <- plot_hicor_genes(
scatter = PFLA_BFLO_COMPARISON$high_corr_genes$pairwise_data$hicor_topgenes$cor_08_To__08_L1$plot_topgenes,
GO_plot = PFLA_BFLO_COMPARISON$high_corr_genes$GOs$GOplot$cor_08_To__08_L1,
age_table = PFLA_BFLO_COMPARISON$high_corr_genes$age$cor_08_To__08_L1$AgeperModule,
age_enrichemnt_hm = PFLA_BFLO_COMPARISON$high_corr_genes$age$cor_08_To__08_L1$heatmap
)
plot_grid(p1,p2,p3,ncol = 1)
Heatmap(
name = "co-occurrence",
PFLA_BFLO_COMPARISON$coocurrence_analysis$cooccurrence[1:16,17:28],
cluster_rows = F,
cluster_columns = F,
col = sequential_hcl(10,"YlOrRd", rev = TRUE)
)
PFLA_BFLO_COMPARISON$plots$orthology_overlap_binomial_hm+
PFLA_BFLO_COMPARISON$plots$orthology_overlap_hypgeom_hm
And here the combination of gene age and functional categories enrichment:
PFLA_BFLO_COMPARISON$orthology_overlap_modules$
genes_in_common_fams$commonfams$
age_a_common$heatmap +
PFLA_BFLO_COMPARISON$orthology_overlap_modules$
genes_in_common_fams$commonfams$
cog_a_comon$heatmap
save(
PFLA_BFLO_COMPARISON,
file = "outputs/rda/species_comparison_pfla_bflo.rda"
)